In [1]:
%matplotlib inline
from matplotlib import pyplot as plt
import gym
import numpy as np
from gym.envs.registration import register
from gym import wrappers
import shutil
In [2]:
# register(
# id='FrozenLakeNotSlippery-v0',
# entry_point='gym.envs.toy_text:FrozenLakeEnv',
# kwargs={'map_name' : '4x4', 'is_slippery': False},
# max_episode_steps=100,
# reward_threshold=0.78, # optimum = .8196
# )
# env = gym.make('FrozenLakeNotSlippery-v0')
In [5]:
env = gym.make('FrozenLake-v0')
In [6]:
Q= np.zeros((env.observation_space.n, env.action_space.n)) #16x4
alpha=0.1
beta=0.001
gamma=0.95
num_episodes = 1000
reward_list=[]
for i in range(num_episodes):
s = env.reset();
done=False
while done==False:
if np.random.rand() < beta:
a = np.random.randint(env.action_space.n)
else:
a = np.argmax(Q[s,:] )
s_next,reward,done,info = env.step(a)
if done:#reaching goal or falling into a hole
r = 1.0 if reward > 0.0 else -1.0
else:#reaching maximum step
r = 0.0
Q[s,a]= (1-alpha)*Q[s,a] + alpha*( r + gamma*np.max(Q[s_next,:]) )
if done == True:
break
s=s_next
reward_list.append(reward)
plt.plot(np.convolve(np.ones(100),reward_list,"valid"))
Out[6]:
In [10]:
print("Final Q-Table Values")
print(Q)
In [12]:
s = env.reset()
d=False
n=0
while d==False:
n+=1
a = np.argmax(Q[s,:])
s,r,d,x = env.step(a)
#print("%s %s %s %s"%(s,r,d,x))
env.render()
print(n)
In [9]:
env.close()
#gym.upload('/tmp/FrozenLake_01', api_key='sk_o9OoYpSkKamkW8MrKuHw')
In [ ]: